package com.shujia.spark.core

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object Demo10Distinct {
  def main(args: Array[String]): Unit = {

    // distinct：转换算子，用于去重
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName(this.getClass.getSimpleName.replace("$", ""))
    val sc = new SparkContext(conf)

    val stuRDD: RDD[String] = sc.textFile("spark/data/stu/students.txt")

    val stuRDD01: RDD[String] = stuRDD.sample(withReplacement = false, fraction = 0.01, seed = 1)
    val stuRDD02: RDD[String] = stuRDD.sample(withReplacement = false, fraction = 0.01, seed = 1)

    stuRDD01.union(stuRDD02).distinct().foreach(println)

  }

}
